In [61]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# from Index_Calculations import facility_index_gen
import os
import glob
import numpy as np
from statsmodels.tsa.tsatools import detrend
from os.path import join
import itertools

Import NERC labels


In [ ]:
path = os.path.join('Data storage', 'Plant NERC regions.csv')
NERC_labels = pd.read_csv(path)
NERC_labels.columns = ['plant id', 'region', 'year']

Calculate index values for each NERC region

This just uses facility data. Which is generally fine for 2015 and earlier, but it does omit DPV.


In [3]:
facility_path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
epa_path = os.path.join('Data storage', 'Derived data',
                        'Monthly EPA emissions 2017-05-25.csv')
ef_path = os.path.join('Data storage', 'Final emission factors.csv')
out_folder = os.path.join('Data storage', 'final NERC data')

In [4]:
facility = pd.read_csv(facility_path)
epa = pd.read_csv(epa_path)

Translate NERC region names

For now, just change ERCOT to TRE.


In [5]:
NERC_labels.loc[NERC_labels['region'] == 'ERCOT', 'region'] = 'TRE'

In [20]:
for nerc in ['USA', 'TRE', 'FRCC', 'NPCC', 'RFC', 'SPP',
             'SERC', 'WECC', 'ASCC', 'MRO', 'HICC']:
    # Only use data from 2015 and earlier (final EIA data)
    facility_index_gen(eia_facility=facility.loc[(facility['year'] <= 2015)], 
                         epa=epa,
                         emission_factor_path=ef_path,
                         facility_regions=NERC_labels,
                         export_folder=out_folder, export_path_ext=' '
                         + nerc, region=nerc)

Read data and create figures


In [2]:
path = os.path.join('Data storage', 'final NERC data', 'Monthly index*')
mi_fns = glob.glob(path)
path = os.path.join('Data storage', 'final NERC data', 'Monthly gen*')
mg_fns = glob.glob(path)

In [3]:
def make_gen_index(data_folder, regions, time='Monthly'):
    """
    Read and combine the region-level generation and index files
    
    inputs:
        data_folder: (str or path) path to the region-level data files
        time: (str) "Monthly", "Quarterly", or "Annual"
        
    outputs:
        gen_index: (df) a dataframe with combined generation and index data
    """
    
    index_path = os.path.join(data_folder, '{} index*'.format(time))
    gen_path = os.path.join(data_folder, '{} gen*'.format(time))
    
    # Get filenames and list of region abbreviations
    index_fns = glob.glob(index_path)
    gen_fns = glob.glob(gen_path)
    
    # Not sure why, but DC shows up in index but not gen
#     index_regions = [f.split('.')[0][-4:] for f in index_fns]
#     gen_regions = [f.split('.')[0][-4:] for f in gen_fns]
    
    
    # Read and combine files into single dataframes
    def read_combine(paths, regions):
        df_list = []
        for f, region in zip(paths, regions):
            df = pd.read_csv(f)
            df['region'] = region
            df_list.append(df)
        
        full_df = pd.concat(df_list)
        full_df.reset_index(inplace=True, drop=True)
        full_df.rename(columns={'year': 'Year'},
                       inplace=True)
        
        if 'datetime' in full_df.columns:
            full_df['datetime'] = pd.to_datetime(full_df['datetime'])
        
        return full_df
    
    index_df = read_combine(index_fns, regions)
    gen_df = read_combine(gen_fns, regions)
    
#     return index_df, gen_df
    
    # Different timescales will pivot on different columns
    gen_pivot_col = {'Monthly': ['Year', 'datetime'],
                     'Quarterly': ['Year', 'year_quarter'],
                     'Annual': ['Year']}
    
    gen_pivot = pd.pivot_table(gen_df, index=['region'] + gen_pivot_col[time], 
                               values='generation (MWh)', columns='fuel category')
    gen_pivot.reset_index(inplace=True, drop=False)
#     gen_pivot['Year'] = gen_pivot['datetime'].dt.year
    gen_pivot.replace(np.nan, 0, inplace=True)
    
    # Merge gen and index dataframes
    merge_col = {'Monthly': ['datetime'],
                 'Quarterly': ['year_quarter'],
                 'Annual': ['Year']}
    
    gen_index = pd.merge(gen_pivot, index_df[merge_col[time] + ['region', 'index (g/kWh)']], 
                         on=merge_col[time] + ['region'])
    
    return gen_index

In [4]:
data_path = os.path.join('Data storage', 'final NERC data')
# gen_index = make_gen_index(data_path, time='Monthly')
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
annual_gen_index = make_gen_index(data_path, regions, time='Annual')

In [5]:
df_list = []
for f in mi_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])

In [6]:
df_list = []
for f in mg_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])

monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'], 
                             values='generation (MWh)', columns='fuel category')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year
monthly_gen.replace(np.nan, 0, inplace=True)

In [7]:
gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'region', 'monthly index (g/kWh)']], 
                     on=['datetime', 'region'])
gen_index.head()


Out[7]:
region datetime Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind Year monthly index (g/kWh)
0 ASCC 2002-01-01 51623.345 118944.230 278301.715 0.0 105692.359 1091.837 0.0 0.0 2002 524.179128
1 ASCC 2002-02-01 44698.280 111685.818 226043.409 0.0 93737.860 920.883 0.0 0.0 2002 531.953278
2 ASCC 2002-03-01 48946.119 114826.098 242509.602 0.0 95438.279 1001.716 0.0 0.0 2002 551.999629
3 ASCC 2002-04-01 43484.123 111308.793 234060.059 0.0 67238.126 951.725 0.0 0.0 2002 523.407268
4 ASCC 2002-05-01 48711.393 108503.867 220310.309 0.0 68730.377 1002.904 0.0 0.0 2002 533.840903

There seems to be a large number of facilities left out of data before 2002. Take out 2001 for now.


In [8]:
gen_index = gen_index.loc[gen_index['Year'] >= 2002]

In [9]:
# define the mean absolute deviation as a function
mad = lambda x: np.fabs(x - x.mean()).mean()

for region in gen_index['region'].unique():
    gen_index.loc[gen_index['region'] == region, 'Index variability'] = \
        gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).std()
    
    gen_index.loc[gen_index['region'] == region, 
                   'Normalized Index variability'] = \
         gen_index.loc[gen_index['region']==region, 'Index variability'] / \
         gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).mean()
            
    # Also calculate variability using median absolute deviation (mad)
    gen_index.loc[gen_index['region'] == region, 'Index variability (MAD)'] = \
        gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).apply(mad)
    
    gen_index.loc[gen_index['region'] == region, 
                   'Normalized Index variability (MAD)'] = \
         gen_index.loc[gen_index['region']==region, 'Index variability (MAD)'] / \
         gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).mean()
gen_index.tail()


Out[9]:
region datetime Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind Year monthly index (g/kWh) Index variability Normalized Index variability Index variability (MAD) Normalized Index variability (MAD)
1831 WECC 2015-08-01 1.846815e+07 1.145648e+07 2.675591e+07 5431555.0 619411.556 2044535.468 2221255.278 4250555.439 2015 436.192584 33.203612 0.081226 27.825260 0.068069
1832 WECC 2015-09-01 1.638549e+07 9.673322e+06 2.513661e+07 5197506.0 592984.906 1868163.912 1854932.073 3225165.969 2015 438.285713 32.058926 0.078610 27.026756 0.066271
1833 WECC 2015-10-01 1.577100e+07 9.027061e+06 2.319022e+07 4002852.0 567863.847 1960396.275 1588800.044 3220602.728 2015 446.763806 31.695605 0.077769 26.809058 0.065779
1834 WECC 2015-11-01 1.376561e+07 1.022417e+07 1.879237e+07 4511483.0 554630.018 1980105.768 1473635.631 3638695.910 2015 411.262181 31.370839 0.077152 26.017113 0.063985
1835 WECC 2015-12-01 1.631912e+07 1.189232e+07 1.945283e+07 5429153.0 621373.984 2055900.008 1261369.497 4415151.642 2015 413.457361 31.344967 0.077107 25.934392 0.063797

In [10]:
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind',
         'Solar', 'Other', 'Other Renewables']
gen_index['Total Generation (MWh)'] = gen_index.loc[:, fuels].sum(axis=1)

In [11]:
gen_index.head()


Out[11]:
region datetime Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind Year monthly index (g/kWh) Index variability Normalized Index variability Index variability (MAD) Normalized Index variability (MAD) Total Generation (MWh)
0 ASCC 2002-01-01 51623.345 118944.230 278301.715 0.0 105692.359 1091.837 0.0 0.0 2002 524.179128 NaN NaN NaN NaN 555653.486
1 ASCC 2002-02-01 44698.280 111685.818 226043.409 0.0 93737.860 920.883 0.0 0.0 2002 531.953278 NaN NaN NaN NaN 477086.250
2 ASCC 2002-03-01 48946.119 114826.098 242509.602 0.0 95438.279 1001.716 0.0 0.0 2002 551.999629 NaN NaN NaN NaN 502721.814
3 ASCC 2002-04-01 43484.123 111308.793 234060.059 0.0 67238.126 951.725 0.0 0.0 2002 523.407268 NaN NaN NaN NaN 457042.826
4 ASCC 2002-05-01 48711.393 108503.867 220310.309 0.0 68730.377 1002.904 0.0 0.0 2002 533.840903 NaN NaN NaN NaN 447258.850

I'm going to skip this for now. Difficult to have a base year for all regions.


In [27]:
base_year = 2007

In [28]:
fuels = ['Coal', 'Natural Gas', 'Other Renewables', 'Nuclear', 'Other',
         'Solar', 'Wind', 'Hydro']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
    # New columns that are being added
    col_percent = 'percent ' + fuel
    col_change = 'change in ' + fuel

    # Calculate percent of generation from each fuel type
    gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']

    # Percent of fuel in region in base year (entire year)
    for region in gen_index['region'].unique():
        percent_fuel_base = (gen_index.loc[(gen_index['Year'] == base_year) & 
                                          (gen_index['region'] == region), fuel].sum()
                             / gen_index.loc[(gen_index['Year'] == base_year) & 
                                             (gen_index['region'] == region), 'Total gen'].sum())

        # Use percent of fuel in base year to calculate change for each region/month
        gen_index.loc[gen_index['region'] == region, 
                      col_change] = ((gen_index.loc[gen_index['region'] == region, col_percent] 
                                      - percent_fuel_base)
                                     / percent_fuel_base)
    
# Change in variability compared to average base year value
for region in gen_index['region'].unique():
    norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['region'] == region), 'Normalized Index variability'].mean()
    variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['region'] == region), 'Index variability'].mean()
    
    gen_index.loc[gen_index['region'] == region, 
                  'change in variability'] = ((gen_index.loc[gen_index['region'] == region, 
                                                            'Index variability'] - variability_base)
                                              / variability_base)
    
    gen_index.loc[gen_index['region'] == region, 
                  'change in norm variability'] = ((gen_index.loc[gen_index['region'] == region, 
                                                            'Normalized Index variability'] - norm_variability_base)
                                                   / norm_variability_base)

In [14]:
def weighted_percent(df, fuel, year):
    all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
                 'Wind', 'Solar', 'Hydro']
    temp = df.loc[df['Year'] == year, all_fuels]
    temp['Total'] = temp.sum(axis=1)
    
    weighted_per = temp[fuel].sum() / temp['Total'].sum() * 100
    return weighted_per

Normalized variability over time in each NERC region

Because I've only used facility data, values are most accurate through the end of 2015. After that we lose data in some regions, like most of the hydro in ERCOT. This has a big influence on results.

The rolling standard deviation and the rolling mean absolute deviation look almost the same. There aren't enough outliers to change the results much.


In [12]:
sns.set_style('white', {'axes.linewidth': 1.5,
                        'axes.grid': True})
sns.set_context('notebook', font_scale=1.2)

In [14]:
def region_facet_grid(df, plot_function, x_axis, y_axis, col_order=None,
                      suptitle='', add_legend=False, ax_labels=None,
                      FG_kwargs={}, plot_kwargs={}, context='notebook',
                      font_scale=1.2):
    sns.set_context(context, font_scale)
    g = sns.FacetGrid(df, col_order=col_order, **FG_kwargs)
    g.map(plot_function, x_axis, y_axis, **plot_kwargs)
    g.set_xticklabels(rotation=35)
    if add_legend:
        g.add_legend()
    if suptitle:
        plt.suptitle(suptitle, y=1.02, size=15)
    if col_order and 'col' in FG_kwargs:
        axes = g.axes.flatten()
        for ax, title in zip(axes, order):
            ax.set_title(title)
    if ax_labels:
        g.set_axis_labels(ax_labels)

In [63]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='Index variability', col_order=order, 
                  suptitle='Index Variability', FG_kwargs=FG_kwargs)



In [64]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='Normalized Index variability', col_order=order, 
                  suptitle='Normalized Index Variability', FG_kwargs=FG_kwargs)



In [18]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order,
                 ylim=(0, 1050))

region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index', FG_kwargs=FG_kwargs,
                  context='talk', font_scale=1)

plt.savefig(join('Figures', 'Monthly index.pdf'))



In [22]:
gen_index['month'] = gen_index['datetime'].dt.month
gen_index['year'] = gen_index['datetime'].dt.year

View CO2 intensity by month, coding the year as a categorical variable.


In [38]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
temp = gen_index.copy()

with sns.plotting_context('poster'):

    g = sns.factorplot(x='month', y='monthly index (g/kWh)', hue='year', 
                       data=gen_index, col='region', col_wrap=3, col_order=order, 
                       palette='viridis_r', scale=0.5)
    # plt.suptitle('CO$_2$ Intensity', y=1.02, size=15)

    axes = g.axes.flatten()
    for ax, title in zip(axes, order):
        ax.set_title(title)
        ax.set_ylim(0, 1050)
    
plt.savefig(join('Figures', 'Monthly index (12-months).pdf'))


Out[38]:
<matplotlib.text.Text at 0x11ce0bd30>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11c66aa90>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11cd26780>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11cc35470>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11b6d1860>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11a73fba8>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11ce01e48>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11caf1ba8>
Out[38]:
(0, 1050)
Out[38]:
<matplotlib.text.Text at 0x11ac914e0>
Out[38]:
(0, 1050)

Correlation between regions (2006-2015)


In [19]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 col='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order,
                 ylim=(-150, 150))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col,
                   'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                     'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
                  add_legend=False, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)



In [213]:
def shift_detrend(series, n):
    'Shift a series by n periods to detrend'
    detrended = series - series.shift(n)
    return detrended

In [233]:
a = detrend(gen_index.loc[gen_index['region'] == 'MRO', 'monthly index (g/kWh)'])
b = shift_detrend(gen_index.loc[gen_index['region'] == 'MRO', 'monthly index (g/kWh)'], 1)
c = shift_detrend(gen_index.loc[gen_index['region'] == 'MRO', 'monthly index (g/kWh)'], 12)

In [230]:
import plotly
import plotly.tools as tls
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot
plotly.offline.init_notebook_mode(connected=True)



In [234]:
# fig = plt.figure()
time = gen_index.loc[gen_index['region'] == 'MRO', 'datetime']
# plt.plot(time, a, label='Linear detrend')
# plt.plot(time, b, label='First difference')
# plt.legend(loc=0)

data = [go.Scatter(y=a, x=time, name='Linear detrend'),
        go.Scatter(y=b, x=time, name='First differences 1-month'),
        go.Scatter(y=c, x=time, name='First differences 12-months')]
iplot(data)
# plotly_fig = tls.mpl_to_plotly(fig)

# plotly.offline.iplot_mpl(fig, resize=True)



In [159]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
#                  col='region',
#                  col_wrap=3,
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(200, 1050))

# detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
# for col in order:
#     detrend_df.loc[detrend_df['region'] == col,
#                    'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col,
#                                                                      'monthly index (g/kWh)'])
    
corr_coef = gen_index.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime', #col_order=order,
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[159]:
<matplotlib.text.Text at 0x130800c10>

In [160]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
#                  col='region',
#                  col_wrap=3,
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-200, 200))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col,
                   'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                     'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[160]:
<matplotlib.text.Text at 0x130866850>

In [214]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-200, 200))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col,
                   'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                     'monthly index (g/kWh)'], 1)
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[214]:
<matplotlib.text.Text at 0x131459350>

In [55]:
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
nerc_index = (gen_index.loc[(gen_index['region'].isin(regions)) &
                            (gen_index['Year'] >= 2006)]
              .pivot_table(values='monthly index (g/kWh)',
                           index='datetime', columns='region'))
nerc_index_detrend = pd.DataFrame(index=nerc_index.index)
for col in nerc_index.columns:
    nerc_index_detrend.loc[:, col] = detrend(nerc_index.loc[:, col])

In [56]:
nerc_index_detrend.head()


Out[56]:
FRCC MRO NPCC RFC SERC SPP TRE USA WECC
datetime
2006-01-01 2.941309 -1.366440 -6.566193 -27.637832 -38.469825 46.829331 26.573113 -16.700268 -25.886488
2006-02-01 -15.046898 18.888801 -7.628336 -9.782690 -19.224405 -5.195642 3.905515 -10.321605 -30.272832
2006-03-01 -27.799179 -9.809718 -6.578126 -0.850255 -19.868925 -14.191493 -22.764873 -12.727337 -17.273519
2006-04-01 -19.451810 -6.993082 -24.452287 -20.474439 -35.777829 -55.733694 -38.098771 -35.247775 -68.285780
2006-05-01 14.763387 15.762760 -31.344860 -35.228484 -2.934095 -41.785755 -24.960402 -28.510237 -77.522157

Try before 2013 and after 2013 to see if there is a difference.


In [58]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
corr = nerc_index_detrend.loc[(nerc_index_detrend.index.year < 2010) &
                              (nerc_index_detrend.index.year >= 2007), order[1:]].corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, vmax = 0.9, mask=mask, square=True, annot=True,
            annot_kws={'size': 10}, cmap='magma_r')
plt.title('2007-2009')
plt.savefig(join('Figures', '2007-2009 correlation.pdf'), bbox_inches='tight')


Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x1172a8240>
Out[58]:
<matplotlib.text.Text at 0x117ce38d0>

In [59]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
corr = nerc_index_detrend.loc[(nerc_index_detrend.index.year < 2013) &
                              (nerc_index_detrend.index.year >= 2010), order[1:]].corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, vmax = 0.9, mask=mask, square=True, annot=True,
            annot_kws={'size': 10}, cmap='magma_r')
plt.title('2010-2012')
plt.savefig(join('Figures', '2010-2012 correlation.pdf'), bbox_inches='tight')


Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b9eafd0>
Out[59]:
<matplotlib.text.Text at 0x11b9ff550>

In [60]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
corr = nerc_index_detrend.loc[(nerc_index_detrend.index.year >= 2013), order[1:]].corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, vmax = 0.9, mask=mask, square=True, annot=True,
            annot_kws={'size': 10}, cmap='magma_r')
plt.title('2013-2015')
plt.savefig(join('Figures', '2013-2015 correlation.pdf'), bbox_inches='tight')


Out[60]:
<matplotlib.axes._subplots.AxesSubplot at 0x11bd28630>
Out[60]:
<matplotlib.text.Text at 0x11bd2ce80>

In [87]:
import itertools

In [85]:
def nerc_correlation(df, regions=[], years=[], detrend=True):
    "Pass in non-detrended data. Detrend for just the years of interest"
    
    df_temp = df.loc[(df['region'].isin(regions)) &
                     (df['Year'].isin(years))].copy()
    if detrend:
        for region in regions:
            df_temp.loc[df_temp['region'] == region,
                       'monthly index (g/kWh)'] = detrend(df_temp.loc[df_temp['region'] == region,
                                                                         'monthly index (g/kWh)'])
    corr_df = df_temp.pivot_table(values='monthly index (g/kWh)',
                                  index='datetime', columns='region').corr()
    return corr_df.iloc[1, 0]

In [95]:
years = range(2006, 2016)
df_list = []
for regions in list(itertools.permutations(order[1:], 2)):
    corr_df = pd.DataFrame(columns=['Year', 'Region1', 'Region2', 'Correlation'],
                           index=years)
    corr_df['Year'] = years
    corr_df['Region1'] = regions[0]
    corr_df['Region2'] = regions[1]
    for year in range(2006, 2016): #[years[x:x+2] for x in xrange(0, len(years), 2)]:
        result = nerc_correlation(gen_index, regions=regions, years=[year], detrend=False)
        corr_df.loc[year, 'Correlation'] = result
    df_list.append(corr_df)

In [96]:
g = sns.FacetGrid(pd.concat(df_list), row='Region1', col='Region2')
g.map(plt.plot, 'Year', 'Correlation')


Out[96]:
<seaborn.axisgrid.FacetGrid at 0x11781ae50>

Percent of fuel in each region


In [29]:
all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
                 'Wind', 'Solar', 'Hydro']
value_cols = ['percent {}'.format(fuel) for fuel in all_fuels]

percent_gen_df = pd.melt(gen_index, id_vars=['region', 'datetime'], 
        value_vars=value_cols, value_name='percent generation',
                        var_name='fuel')
percent_gen_df['fuel'] = percent_gen_df['fuel'].map(lambda x: x.split()[-1])
percent_gen_df['fuel'].replace('Renewables', 'Other Renewables', inplace=True)
percent_gen_df['fuel'].replace('Gas', 'Natural Gas', inplace=True)

In [30]:
gen_index.head()


Out[30]:
region datetime Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind ... percent Other change in Other percent Solar change in Solar percent Wind change in Wind percent Hydro change in Hydro change in variability change in norm variability
0 ASCC 2002-01-01 51623.345 118944.230 278301.715 0.0 105692.359 1091.837 0.0 0.0 ... 0.190213 0.284681 0.0 NaN 0.0 -1.0 0.214062 0.130866 NaN NaN
1 ASCC 2002-02-01 44698.280 111685.818 226043.409 0.0 93737.860 920.883 0.0 0.0 ... 0.196480 0.327009 0.0 NaN 0.0 -1.0 0.234100 0.236724 NaN NaN
2 ASCC 2002-03-01 48946.119 114826.098 242509.602 0.0 95438.279 1001.716 0.0 0.0 ... 0.189843 0.282185 0.0 NaN 0.0 -1.0 0.228409 0.206659 NaN NaN
3 ASCC 2002-04-01 43484.123 111308.793 234060.059 0.0 67238.126 951.725 0.0 0.0 ... 0.147116 -0.006393 0.0 NaN 0.0 -1.0 0.243541 0.286602 NaN NaN
4 ASCC 2002-05-01 48711.393 108503.867 220310.309 0.0 68730.377 1002.904 0.0 0.0 ... 0.153670 0.037876 0.0 NaN 0.0 -1.0 0.242597 0.281616 NaN NaN

5 rows × 38 columns


In [31]:
percent_gen_df.head()


Out[31]:
region datetime fuel percent generation
0 ASCC 2002-01-01 Coal 0.092906
1 ASCC 2002-02-01 Coal 0.093690
2 ASCC 2002-03-01 Coal 0.097362
3 ASCC 2002-04-01 Coal 0.095142
4 ASCC 2002-05-01 Coal 0.108911

In [32]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
fuel_order = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar',
              'Other', 'Other Renewables']
FG_kwargs = dict(hue='fuel',
                 col='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=fuel_order)

region_facet_grid(df=percent_gen_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='percent generation', col_order=order, 
                  suptitle='', FG_kwargs=FG_kwargs,
                  context='talk', font_scale=1)

plt.savefig(join('Figures', 'Generation by fuel.pdf'))



In [46]:
percent_gen_df['month'] = percent_gen_df['datetime'].dt.month
percent_gen_df['year'] = percent_gen_df['datetime'].dt.year

In [47]:
percent_gen_df.head()


Out[47]:
region datetime fuel percent generation month year
0 ASCC 2002-01-01 Coal 0.092906 1 2002
1 ASCC 2002-02-01 Coal 0.093690 2 2002
2 ASCC 2002-03-01 Coal 0.097362 3 2002
3 ASCC 2002-04-01 Coal 0.095142 4 2002
4 ASCC 2002-05-01 Coal 0.108911 5 2002

Looking at seasonal variation in generation across different regions and timeframes.

All regions, from 2013 through 2015


In [35]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(fuels)) & 
                             (percent_gen_df.datetime.dt.year >= 2012)]
sns.factorplot(x='month', y='percent generation', hue='fuel', data=temp_df,
              col='region', col_wrap=3, col_order=order, hue_order=fuels)


Out[35]:
<seaborn.axisgrid.FacetGrid at 0x112980d50>

All regions, before 2013


In [36]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(fuels)) & 
                             (percent_gen_df.datetime.dt.year <= 2012)]
sns.factorplot(x='month', y='percent generation', hue='fuel', data=temp_df,
              col='region', col_wrap=3, col_order=order, hue_order=fuels)


Out[36]:
<seaborn.axisgrid.FacetGrid at 0x11507e9d0>

Only coal, NG, and wind in SPP. All years in each facet


In [49]:
order = ['SPP']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(['Wind', 'Coal', 'Natural Gas'])) & 
                             (percent_gen_df.region.isin(order))]
sns.factorplot(x='month', y='percent generation', hue='year', data=temp_df,
               col='fuel', palette='viridis')
# sns.factorplot(x='month', y='percent generation', hue='fuel', data=temp_df,
#               col='year', col_wrap=3, hue_order=fuels)


Out[49]:
<seaborn.axisgrid.FacetGrid at 0x1127f0cd0>

In [109]:
order = ['SPP', 'TRE', 'MRO']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(['Wind', 'Coal', 'Natural Gas'])) & 
                             (percent_gen_df.region.isin(order)) &
                             (percent_gen_df.year >=2007)].copy()
def early_late(x):
    if x < 2010:
        return 'Early'
    elif x < 2013:
        return 'Mid'
    elif x >= 2013:
        return 'Late'
    else:
        return ValueError

temp_df.loc[:, 'timeframe'] = temp_df.loc[:, 'year'].map(early_late)
merge_cols = ['region', 'month', 'year']
temp_df = pd.merge(temp_df, gen_index[merge_cols + ['Total gen']],
                   on=merge_cols)
temp_df['generation'] = (temp_df.loc[:, 'percent generation']
                         * temp_df.loc[:, 'Total gen'] / 1e6)

In [110]:
# a is the list of colors to get wind purple
a = sns.color_palette('tab10')[0:2]
a.append(sns.color_palette('tab10')[4])

In [111]:
with sns.plotting_context(font_scale=1.2):
    g = sns.FacetGrid(temp_df, col='timeframe', row='region',
                      col_order=['Early', 'Mid', 'Late'], row_order=order)
    g.map_dataframe(sns.tsplot, time='month', unit='year', value='percent generation', 
                    condition='fuel', err_style='unit_traces', color=a).add_legend()
    g.set_axis_labels('Month', 'Percent Generation')
    titles = ['{}, {}'.format(a, b) for (a, b) in 
              list(itertools.product(order, ['2007-2009', '2010-2012', '2013-2015']))]
    axes = g.axes.flatten()
    for ax, title in zip(axes, titles):
        ax.set_title(title)
# plt.savefig(join('Figures', 'Adding wind.pdf'), bbox_inches='tight')


/Users/Home/anaconda/envs/py36/lib/python3.6/site-packages/seaborn/timeseries.py:183: UserWarning: The tsplot function is deprecated and will be removed or replaced (in a substantially altered version) in a future release.
  warnings.warn(msg, UserWarning)
Out[111]:
<seaborn.axisgrid.FacetGrid at 0x1217ff908>
Out[111]:
<seaborn.axisgrid.FacetGrid at 0x1217ff908>
Out[111]:
<matplotlib.text.Text at 0x1245a3fd0>
Out[111]:
<matplotlib.text.Text at 0x1245eafd0>
Out[111]:
<matplotlib.text.Text at 0x1246592e8>
Out[111]:
<matplotlib.text.Text at 0x1246b2128>
Out[111]:
<matplotlib.text.Text at 0x124720710>
Out[111]:
<matplotlib.text.Text at 0x124771860>
Out[111]:
<matplotlib.text.Text at 0x1247e2e10>
Out[111]:
<matplotlib.text.Text at 0x124832ac8>
Out[111]:
<matplotlib.text.Text at 0x1248af748>

In [100]:
temp_df.groupby(['region',  'fuel', 'timeframe']).mean()


Out[100]:
percent generation month year
region fuel timeframe
MRO Coal Early 0.630208 6.5 2011.0
Late 0.590813 6.5 2014.0
Natural Gas Early 0.037343 6.5 2011.0
Late 0.051716 6.5 2014.0
Wind Early 0.122331 6.5 2011.0
Late 0.166540 6.5 2014.0
SPP Coal Early 0.559864 6.5 2011.0
Late 0.517152 6.5 2014.0
Natural Gas Early 0.306936 6.5 2011.0
Late 0.274068 6.5 2014.0
Wind Early 0.055447 6.5 2011.0
Late 0.126280 6.5 2014.0
TRE Coal Early 0.342231 6.5 2011.0
Late 0.310446 6.5 2014.0
Natural Gas Early 0.447095 6.5 2011.0
Late 0.469053 6.5 2014.0
Wind Early 0.080329 6.5 2011.0
Late 0.101316 6.5 2014.0

In [113]:
with sns.plotting_context(font_scale=1.2):
    g = sns.FacetGrid(temp_df, col='timeframe', row='region',
                      col_order=['Early', 'Mid', 'Late'], row_order=order)
    g.map_dataframe(sns.tsplot, time='month', unit='year', value='generation', 
                    condition='fuel', err_style='unit_traces', color=a).add_legend()
    g.set_axis_labels('Month', 'Million MWh')
    titles = ['{}, {}'.format(a, b) for (a, b) in 
              list(itertools.product(order, ['2007-2009', '2010-2012', '2013-2015']))]
    axes = g.axes.flatten()
    for ax, title in zip(axes, titles):
        ax.set_title(title)
        ax.set_xticks([1, 4, 7, 10])
# plt.savefig(join('Figures', 'Adding wind.pdf'), bbox_inches='tight')


/Users/Home/anaconda/envs/py36/lib/python3.6/site-packages/seaborn/timeseries.py:183: UserWarning: The tsplot function is deprecated and will be removed or replaced (in a substantially altered version) in a future release.
  warnings.warn(msg, UserWarning)
Out[113]:
<seaborn.axisgrid.FacetGrid at 0x1215ae908>
Out[113]:
<seaborn.axisgrid.FacetGrid at 0x1215ae908>
Out[113]:
<matplotlib.text.Text at 0x1215ecc50>
Out[113]:
[<matplotlib.axis.XTick at 0x12524e5c0>,
 <matplotlib.axis.XTick at 0x1240eb080>,
 <matplotlib.axis.XTick at 0x1255a6fd0>,
 <matplotlib.axis.XTick at 0x12595e6d8>]
Out[113]:
<matplotlib.text.Text at 0x1252b8438>
Out[113]:
[<matplotlib.axis.XTick at 0x1252a0828>,
 <matplotlib.axis.XTick at 0x125290978>,
 <matplotlib.axis.XTick at 0x1240ab978>,
 <matplotlib.axis.XTick at 0x1214f7eb8>]
Out[113]:
<matplotlib.text.Text at 0x1252f8cf8>
Out[113]:
[<matplotlib.axis.XTick at 0x1252f3b00>,
 <matplotlib.axis.XTick at 0x125290240>,
 <matplotlib.axis.XTick at 0x1252b15c0>,
 <matplotlib.axis.XTick at 0x120db1c50>]
Out[113]:
<matplotlib.text.Text at 0x12537eb00>
Out[113]:
[<matplotlib.axis.XTick at 0x12534db70>,
 <matplotlib.axis.XTick at 0x125362860>,
 <matplotlib.axis.XTick at 0x12528ba58>,
 <matplotlib.axis.XTick at 0x120d764a8>]
Out[113]:
<matplotlib.text.Text at 0x1253df278>
Out[113]:
[<matplotlib.axis.XTick at 0x1253d0978>,
 <matplotlib.axis.XTick at 0x125378b70>,
 <matplotlib.axis.XTick at 0x125319f60>,
 <matplotlib.axis.XTick at 0x1214adb70>]
Out[113]:
<matplotlib.text.Text at 0x125448a20>
Out[113]:
[<matplotlib.axis.XTick at 0x12541b278>,
 <matplotlib.axis.XTick at 0x125413f28>,
 <matplotlib.axis.XTick at 0x125367ba8>,
 <matplotlib.axis.XTick at 0x1215b7668>]
Out[113]:
<matplotlib.text.Text at 0x1254b34a8>
Out[113]:
[<matplotlib.axis.XTick at 0x125485c50>,
 <matplotlib.axis.XTick at 0x12549a7b8>,
 <matplotlib.axis.XTick at 0x12542dc88>,
 <matplotlib.axis.XTick at 0x1214b6e80>]
Out[113]:
<matplotlib.text.Text at 0x125530048>
Out[113]:
[<matplotlib.axis.XTick at 0x1254f1ac8>,
 <matplotlib.axis.XTick at 0x12551af98>,
 <matplotlib.axis.XTick at 0x1254f16a0>,
 <matplotlib.axis.XTick at 0x1215aee10>]
Out[113]:
<matplotlib.text.Text at 0x12557da20>
Out[113]:
[<matplotlib.axis.XTick at 0x1254f1e10>,
 <matplotlib.axis.XTick at 0x125570f28>,
 <matplotlib.axis.XTick at 0x1254f1cc0>,
 <matplotlib.axis.XTick at 0x1214e3278>]

Look at the growth in average monthly wind generation across the two timeframes, and the change in natural gas/coal generation.


In [103]:
temp_df.groupby(['region',  'fuel', 'timeframe']).mean()


Out[103]:
percent generation month year Total gen generation
region fuel timeframe
MRO Coal Early 0.630208 6.5 2011.0 1.896983e+07 11.984091
Late 0.590813 6.5 2014.0 1.879558e+07 11.138114
Natural Gas Early 0.037343 6.5 2011.0 1.896983e+07 0.736706
Late 0.051716 6.5 2014.0 1.879558e+07 0.978960
Wind Early 0.122331 6.5 2011.0 1.896983e+07 2.272135
Late 0.166540 6.5 2014.0 1.879558e+07 3.102439
SPP Coal Early 0.559864 6.5 2011.0 1.916001e+07 10.613247
Late 0.517152 6.5 2014.0 1.908230e+07 9.882392
Natural Gas Early 0.306936 6.5 2011.0 1.916001e+07 6.062922
Late 0.274068 6.5 2014.0 1.908230e+07 5.262823
Wind Early 0.055447 6.5 2011.0 1.916001e+07 1.024581
Late 0.126280 6.5 2014.0 1.908230e+07 2.350254
TRE Coal Early 0.342231 6.5 2011.0 2.922892e+07 9.969315
Late 0.310446 6.5 2014.0 3.056596e+07 9.510300
Natural Gas Early 0.447095 6.5 2011.0 2.922892e+07 13.227784
Late 0.469053 6.5 2014.0 3.056596e+07 14.417579
Wind Early 0.080329 6.5 2011.0 2.922892e+07 2.264802
Late 0.101316 6.5 2014.0 3.056596e+07 3.012674

In [126]:
map_timeframe = {'Early': 0,
                 'Mid': 0.5,
                 'Late': 1}

data = (temp_df.groupby(['region', 'fuel', 'timeframe'])
                   .std()
                   .reset_index())
data['frame'] = data['timeframe'].map(map_timeframe)
data.sort_values(['region', 'fuel', 'frame'], inplace=True)

g = sns.FacetGrid(data, hue='fuel', col='region',
                  col_order=['SPP', 'TRE', 'MRO'], palette=a)
g.map(plt.plot, 'frame', 'generation').add_legend()
g.set_axis_labels('Timeframe', 'Variability')
axes = g.axes.flatten()
for ax in axes:
    ax.set_xticks([0, 0.5, 1])
    ax.set_xticklabels([' 2007 -\n 2009  ', '2010 -\n2012  ',
                        '2013 - \n2015   '])
    ax.set_xlim(-.1, 1.1)


Out[126]:
<seaborn.axisgrid.FacetGrid at 0x1271bb908>
Out[126]:
<seaborn.axisgrid.FacetGrid at 0x1271bb908>
Out[126]:
[<matplotlib.axis.XTick at 0x123404e80>,
 <matplotlib.axis.XTick at 0x11bf36c88>,
 <matplotlib.axis.XTick at 0x1264befd0>]
Out[126]:
[<matplotlib.text.Text at 0x120d82d30>,
 <matplotlib.text.Text at 0x12488be80>,
 <matplotlib.text.Text at 0x126cc1128>]
Out[126]:
(-0.1, 1.1)
Out[126]:
[<matplotlib.axis.XTick at 0x126be1358>,
 <matplotlib.axis.XTick at 0x126bdb668>,
 <matplotlib.axis.XTick at 0x1260674a8>]
Out[126]:
[<matplotlib.text.Text at 0x12674fcf8>,
 <matplotlib.text.Text at 0x126761748>,
 <matplotlib.text.Text at 0x126ce95f8>]
Out[126]:
(-0.1, 1.1)
Out[126]:
[<matplotlib.axis.XTick at 0x1260c5fd0>,
 <matplotlib.axis.XTick at 0x123409e48>,
 <matplotlib.axis.XTick at 0x126761710>]
Out[126]:
[<matplotlib.text.Text at 0x12675ae80>,
 <matplotlib.text.Text at 0x1260adb38>,
 <matplotlib.text.Text at 0x126cd7470>]
Out[126]:
(-0.1, 1.1)

Annual index values


In [175]:
annual_gen_index.head()


Out[175]:
region Year Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind index (g/kWh)
0 FRCC 2001 3.635751e+08 2.204357e+07 5.138395e+07 187650128.0 8.122448e+06 0.000000e+00 0.0 0.0 609.802294
1 FRCC 2002 4.412558e+08 2.651508e+07 1.147544e+08 252761123.0 1.340431e+07 1.920941e+07 0.0 4068.0 580.658585
2 FRCC 2003 4.414591e+08 4.235141e+07 8.931378e+07 246944988.0 1.778760e+07 1.318030e+07 0.0 3933.0 579.308184
3 FRCC 2004 4.517296e+08 3.521664e+07 1.098346e+08 256364920.0 2.040076e+07 1.683418e+07 0.0 3813.0 562.939848
4 FRCC 2005 4.805494e+08 3.418739e+07 1.181784e+08 251513134.0 1.651148e+07 1.646854e+07 0.0 3339.0 574.508693

In [181]:
annual_gen_index.loc[annual_gen_index['region'] == 'SERC']


Out[181]:
region Year Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind index (g/kWh)
55 SERC 2001 7.151246e+06 1.829287e+07 2.943617e+06 34726301.0 7.586375e+06 1.773350e+05 0.00 12133.000 218.322689
56 SERC 2002 4.198732e+07 2.942033e+07 8.175643e+07 73561762.0 2.919109e+07 4.808592e+06 0.00 91998.000 395.725502
57 SERC 2003 4.278476e+07 2.996434e+07 7.289092e+07 75455695.0 3.876786e+07 3.908553e+06 0.00 52030.001 405.117318
58 SERC 2004 4.191305e+07 2.989485e+07 7.522369e+07 77153595.0 4.009624e+07 4.774855e+06 0.00 109016.000 394.775591
59 SERC 2005 4.078794e+07 3.309260e+07 8.124198e+07 77007763.0 4.355358e+07 5.517152e+06 0.00 114475.999 389.669146
60 SERC 2006 4.059439e+07 3.539775e+07 9.493770e+07 79147382.0 1.509632e+07 8.562950e+06 0.00 666058.999 347.256614
61 SERC 2007 4.147160e+07 3.045338e+07 9.940021e+07 79426397.0 1.735074e+07 8.980801e+06 0.00 943058.002 352.396734
62 SERC 2008 3.797199e+07 3.452639e+07 9.509392e+07 78756123.0 1.078319e+07 8.844715e+06 80.19 1406547.310 324.155219
63 SERC 2009 2.719829e+07 3.569982e+07 9.280932e+07 79715298.0 8.245965e+06 8.467415e+06 42.57 2644983.520 288.293521
64 SERC 2010 2.766289e+07 3.264042e+07 1.075385e+08 80230180.0 6.840508e+06 8.565847e+06 824.26 3155407.470 299.926632
65 SERC 2011 1.623389e+07 3.571285e+07 1.139487e+08 76978228.0 5.783529e+06 7.880439e+06 13022.47 3686049.690 273.350345
66 SERC 2012 8.096811e+06 3.120608e+07 1.219260e+08 76890506.0 5.211014e+06 8.276991e+06 92988.70 4259721.100 261.528087
67 SERC 2013 1.086327e+07 3.181064e+07 1.064648e+08 81938883.0 6.152340e+06 8.645710e+06 203771.33 5419873.740 254.083076
68 SERC 2014 9.601994e+06 3.265607e+07 1.019295e+08 79877244.0 8.389898e+06 8.671962e+06 455423.30 6009327.150 249.972043
69 SERC 2015 6.144066e+06 3.201717e+07 1.102206e+08 76492798.0 7.738162e+06 8.438667e+06 659719.83 6246282.140 250.736220

In [177]:
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
for region in regions:
    temp = annual_gen_index.loc[annual_gen_index['region'] == region]
    start_year = temp.loc[:, 'Year'].values[0]
    start = temp.loc[:, 'index (g/kWh)'].values[0]
    end = temp.loc[:, 'index (g/kWh)'].values[-1]
    
    change = (end - start) / start * 100
    
    print '{} declined {:.1f}%, from {:.1f} in {} to {:.1f}'.format(region, change,
                                                            start, start_year,
                                                            end)


USA declined -9.4%, from 538.2 in 2002 to 487.7
SPP declined -53.5%, from 1003.9 in 2001 to 467.2
MRO declined -16.5%, from 783.4 in 2002 to 654.3
RFC declined -27.5%, from 838.3 in 2005 to 608.0
SERC declined 14.8%, from 218.3 in 2001 to 250.7
TRE declined -17.5%, from 649.3 in 2006 to 535.9
FRCC declined -20.0%, from 609.8 in 2001 to 487.5
WECC declined -34.4%, from 962.7 in 2001 to 631.6
NPCC declined -24.6%, from 642.2 in 2001 to 484.4

In [186]:
gen_index.loc[gen_index['region'] == 'USA', ['datetime', 'monthly index (g/kWh)']]


Out[186]:
datetime monthly index (g/kWh)
1500 2002-01-01 607.894450
1501 2002-02-01 604.119188
1502 2002-03-01 610.360180
1503 2002-04-01 603.583763
1504 2002-05-01 604.596839
1505 2002-06-01 609.809488
1506 2002-07-01 624.623198
1507 2002-08-01 629.097980
1508 2002-09-01 631.823110
1509 2002-10-01 634.634598
1510 2002-11-01 621.886954
1511 2002-12-01 622.543964
1512 2003-01-01 635.565217
1513 2003-02-01 629.944923
1514 2003-03-01 618.691987
1515 2003-04-01 599.940766
1516 2003-05-01 600.320519
1517 2003-06-01 613.410518
1518 2003-07-01 625.947863
1519 2003-08-01 628.096899
1520 2003-09-01 628.683803
1521 2003-10-01 634.708848
1522 2003-11-01 629.954687
1523 2003-12-01 623.574465
1524 2004-01-01 619.529728
1525 2004-02-01 609.232278
1526 2004-03-01 595.568898
1527 2004-04-01 588.474010
1528 2004-05-01 593.796666
1529 2004-06-01 597.017845
... ... ...
1638 2013-07-01 534.360601
1639 2013-08-01 536.694235
1640 2013-09-01 532.405165
1641 2013-10-01 515.384823
1642 2013-11-01 504.990793
1643 2013-12-01 520.314670
1644 2014-01-01 531.150742
1645 2014-02-01 545.220152
1646 2014-03-01 517.090251
1647 2014-04-01 482.899620
1648 2014-05-01 493.290414
1649 2014-06-01 513.403656
1650 2014-07-01 527.801391
1651 2014-08-01 535.260200
1652 2014-09-01 516.448597
1653 2014-10-01 494.047370
1654 2014-11-01 493.353745
1655 2014-12-01 489.258768
1656 2015-01-01 493.451214
1657 2015-02-01 504.276261
1658 2015-03-01 469.818465
1659 2015-04-01 443.563635
1660 2015-05-01 474.220574
1661 2015-06-01 506.130683
1662 2015-07-01 514.700299
1663 2015-08-01 513.834442
1664 2015-09-01 503.734010
1665 2015-10-01 474.697732
1666 2015-11-01 448.987993
1667 2015-12-01 431.867310

168 rows × 2 columns


In [187]:
df_list = []
for f in ai_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_ai = pd.concat(df_list)
full_ai.reset_index(inplace=True, drop=True)
# full_ai.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)

In [190]:
df_list = []
for f in ag_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_ag = pd.concat(df_list)
full_ag.reset_index(inplace=True, drop=True)
# full_ag['datetime'] = pd.to_datetime(full_ag['datetime'])

annual_gen = pd.pivot_table(full_ag, index=['region', 'year'], 
                             values='generation (MWh)', columns='fuel category')
annual_gen.reset_index(inplace=True, drop=False)
annual_gen.replace(np.nan, 0, inplace=True)

In [198]:
annual_gen_index = pd.merge(annual_gen, full_ai[['year', 'region', 'index (g/kWh)']], 
                     on=['year', 'region'])
annual_gen_index.head()


Out[198]:
region year Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind index (g/kWh)
0 ASCC 2002 5.752878e+05 1439351.001 2.797913e+06 0.0 931895.38600 12155.00000 0.0 0.000 538.205197
1 ASCC 2003 4.567480e+05 1582536.373 3.205922e+06 0.0 843198.12100 5776.58000 0.0 0.000 536.534989
2 ASCC 2004 6.489797e+05 1498020.009 3.623433e+06 0.0 747068.07261 9216.50560 0.0 0.000 535.042630
3 ASCC 2005 1.920282e+06 1463942.006 3.723626e+06 0.0 847097.54313 5255.56792 0.0 589.000 633.494336
4 ASCC 2006 6.174190e+05 1223607.006 4.057730e+06 0.0 767987.90118 6664.50779 0.0 788.001 560.168854

In [202]:
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
for region in regions:
    temp = annual_gen_index.loc[annual_gen_index['region'] == region]
    start_year = temp.loc[:, 'year'].values[0]
    start = temp.loc[:, 'index (g/kWh)'].values[0]
    if region in ['FRCC', 'NPCC', 'USA']:
        start = temp.loc[:, 'index (g/kWh)'].values[1]
        start_year = temp.loc[:, 'year'].values[1]
    
    end = temp.loc[:, 'index (g/kWh)'].values[-1]
    
    per_change = (start - end) / start * 100
    change = start - end
    rate = change / (2015 - start_year)
    
    print '''{} declined {:.1f} ({:.1f}%), from {:.1f} in {} to {:.1f},
    a rate of {:.1f}'''.format(region, change, per_change, start, start_year,
                           end, rate)


USA declined 133.8 (21.7%), from 617.6 in 2002 to 483.8,
    a rate of 10.3
SPP declined 331.1 (34.4%), from 962.7 in 2001 to 631.6,
    a rate of 23.6
MRO declined 230.3 (27.5%), from 838.3 in 2005 to 608.0,
    a rate of 23.0
RFC declined 113.3 (17.5%), from 649.3 in 2006 to 535.9,
    a rate of 12.6
SERC declined 122.3 (20.0%), from 609.8 in 2001 to 487.5,
    a rate of 8.7
TRE declined 157.8 (24.6%), from 642.2 in 2001 to 484.4,
    a rate of 11.3
FRCC declined 153.7 (24.7%), from 620.9 in 2002 to 467.2,
    a rate of 11.8
WECC declined 54.5 (11.8%), from 463.1 in 2002 to 408.6,
    a rate of 4.2
NPCC declined 145.0 (36.6%), from 395.7 in 2002 to 250.7,
    a rate of 11.2

In [ ]: